import numpy as np
import pandas as pd
pd.set_option("display.max_columns",None)
pd.set_option("display.max_rows",None)
import warnings
warnings.filterwarnings("ignore")
import plotly.graph_objects as go
import plotly.express as px
import plotly.io as pio
pio.templates.default = "seaborn"
from plotly.subplots import make_subplots
confirmed_df = pd.read_csv("covid_19_confirmed_global.csv", parse_dates = ['date'])
deaths_df = pd.read_csv("covid_19_deaths_global.csv", parse_dates = ['date'])
recovered_df = pd.read_csv("covid_19_recovered_global.csv", parse_dates = ['date'])
cw_df = pd.read_csv("covid_19_country_global.csv", parse_dates = ['date'])
print(confirmed_df.shape)
print(deaths_df.shape)
print(recovered_df.shape)
print(cw_df.shape)
confirmed_df.head()
deaths_df.head()
recovered_df.head()
cw_df.head()
a = cw_df.date.value_counts().sort_index()
print('The first date is:',a.index[0])
print('The last date is:',a.index[-1])
df1 = confirmed_df
df1['date'] = pd.to_datetime(df1['date'])
df1['date'] = df1['date'].dt.strftime('%m/%d/%Y')
df1 = df1.fillna('-')
fig = px.density_mapbox(df1, lat='lat', lon='long', z='confirmed', radius=20,zoom=1,
hover_data=["country",'state',"confirmed"], mapbox_style="carto-positron",
animation_frame = 'date', range_color= [0, 2000],title='COVID-19 Spread Analysis')
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
# Active Cases = confirmed - deaths - recovered
cw_df['active'] = cw_df['confirmed'] - cw_df['deaths'] - cw_df['recovered']
top = cw_df[cw_df['date'] == cw_df['date'].max()]
world = top.groupby('country')['confirmed','active','deaths'].sum().reset_index()
world.head()
fig = px.choropleth(world, locations="country",
locationmode='country names', color="active",
hover_name="country", range_color=[1,20000],
color_continuous_scale="Peach",
title='Active cases in each country')
fig.update_layout(width=1000,height=640)
fig.show()
fig = px.scatter_mapbox(recovered_df , lat="lat", lon="long", hover_name="country",
hover_data=["country","recovered"], color_discrete_sequence=["mediumspringgreen"],
zoom=1, height=480, title='Recovered count of each country' )
fig.update_layout(
mapbox_style="white-bg",
mapbox_layers=[
{
"below": 'traces',
"sourcetype": "raster",
"source": [
"https://basemap.nationalmap.gov/arcgis/rest/services/USGSImageryOnly/MapServer/tile/{z}/{y}/{x}"
]
}
])
fig.update_layout(margin={"r":0,"t":30,"l":0,"b":0})
fig.show()
world['size'] = world['deaths'].pow(0.25)
fig = px.scatter_geo(world, locations='country',locationmode='country names', color='deaths',
color_continuous_scale='plotly3', hover_name='country', size='size',
hover_data = ['country','deaths'], projection='natural earth',
title='Death count of each country')
fig.show()
total_cases = confirmed_df.groupby('date')['date', 'confirmed'].sum().reset_index()
total_cases['date'] = pd.to_datetime(total_cases['date'])
fig = go.Figure()
fig.add_trace(go.Scatter(x=total_cases['date'], y=total_cases['confirmed'],
mode='lines+markers', line = dict(color='orangered', width=2)))
fig.update_layout(title='Worldwide Confirmed Cases Over Time',
xaxis_title='Date',
yaxis_title='Total Cases')
fig.show()
top = cw_df[cw_df['date'] == cw_df['date'].max()]
top_casualities = top.groupby(by = 'country')['confirmed'].sum().sort_values(ascending = False).head(20).reset_index()
top_casualities
fig = px.bar(top_casualities , x='confirmed', y='country', orientation='h',
color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most confirmed cases',
xaxis_title='Total cases',
yaxis_title='Country', showlegend=False)
fig.show()
top_actives = top.groupby(by = 'country')['active'].sum().sort_values(ascending = False).head(20).reset_index()
top_actives
fig = px.bar(top_actives , x='active', y='country', orientation='h',
color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most active cases',
xaxis_title='Total cases',
yaxis_title='Country', showlegend=False)
fig.show()
top_deaths = top.groupby(by = 'country')['deaths'].sum().sort_values(ascending = False).head(20).reset_index()
top_deaths
fig = px.bar(top_deaths , x='deaths', y='country', orientation='h',
color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most deaths',
xaxis_title='Total cases',
yaxis_title='Country', showlegend=False)
fig.show()
top_recovered = top.groupby(by = 'country')['recovered'].sum().sort_values(ascending = False).head(20).reset_index()
top_recovered
fig = px.bar(top_recovered , x='recovered', y='country', orientation='h',
color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having most recovered cases',
xaxis_title='Total cases',
yaxis_title='Country', showlegend=False)
fig.show()
rate = top.groupby(by = 'country')['recovered','confirmed','deaths'].sum().reset_index()
rate['recovery_percentage'] = round(((rate['recovered']) / (rate['confirmed'])) * 100 , 2)
rate['death_percentage'] = round(((rate['deaths']) / (rate['confirmed'])) * 100 , 2)
rate.head()
mortality = rate.groupby(by = 'country')['death_percentage'].sum().sort_values(ascending = False).head(20).reset_index()
mortality
fig = px.bar(mortality , x='death_percentage', y='country', orientation='h',
color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having highest mortality rate',
xaxis_title='Mortality rate in percentage',
yaxis_title='Country', showlegend=False)
fig.show()
recovery = rate.groupby(by = 'country')['recovery_percentage'].sum().sort_values(ascending = False).head(20).reset_index()
recovery
fig = px.bar(recovery , x='recovery_percentage', y='country', orientation='h',
color='country', color_discrete_sequence=px.colors.qualitative.Plotly)
fig.update_layout(title='Top 20 countries having highest recovery rate',
xaxis_title='Recovery rate in percentage',
yaxis_title='Country', showlegend=False)
fig.show()